cd ""  ///change directory for where files are kept

use "project data.dta", clear //this is the cleaned replication data from PNAS project
set scheme plottig

* collaboration index
label define collab 0 "None (=0)" 1 "Opportunity (=1)" 2 "Understood(=2)" 3 "Partially/Fully Addressed (=3)"

*collaboration w/n community
gen collabcomm=(indcomminter==1) if indcomminter!=.
replace collabcomm=cond(indcomminter_3==1,1,collabcomm)
replace collabcomm=cond(indcomminter_4==1,2,collabcomm)
replace collabcomm=cond(indcomminter_4==2,3,collabcomm)
replace collabcomm=cond(indcomminter_4==3,3,collabcomm)

tab collabcomm, m
label var collabcomm "Within Community"
label values collabcomm collab

* collaboration w/ other actors
foreach x in oil locgovt cengovt cso {
	qui gen collab`x'=(comm`x'inter==1) if comm`x'inter!=.
	qui replace collab`x'=cond(comm`x'inter_3==1,1,collab`x')
	qui replace collab`x'=cond(comm`x'inter_4==1,2,collab`x')
	qui replace collab`x'=cond(comm`x'inter_4==2,3,collab`x')
	qui replace collab`x'=cond(comm`x'inter_4==3,3,collab`x')
	tab collab`x', m
	label values collab`x' collab
}

rename collabcomm collab1
rename collaboil collab2
rename collablocgovt collab3
rename collabcengovt collab4
rename collabcso collab5

reshape long collab, i(hhid) j(actor)
label define actor 1 "Within community" 2 "Oil Companies" 3 "Local Govt" 4 "Central Govt" 5 "CSOs"
label values actor actor
tab actor

label define endline 0 "Baseline" 1 "Endline"
label values endline endline

egen treatgroup=group(treat endline)
label define treatgroup 1 "Control/Baseline" 2 "Control/Endline" 3 "Treatment/Baseline" 4 "Treatment/Endline"
label values treatgroup treatgroup

graph bar (count), over(collab) over(treat, label(labsize(4-pt))) over(endline) ///
	missing stack ytitle(Frequency) ///
	///by(, title({bf:Distribution of Collaboration}, size(16-pt) color(white) span box fcolor(black) bexpand justification(left)) ///
	///subtitle("     by actor, data collection wave, and treatment assignment", size(10-pt) span bexpand justification(left)) ///
	by(, note("") legend(position(0) at(6))) xsize(5) ysize(4) ///
	///subtitle(, box lc(black) margin(small)) ///
	legend(title(" " " ") order(1 "None (=0)" 2 "Opportunties (=1)" 3 "Concerns Understood (=2)" 4 "Concerns Resolved (=3)" 5 "Missing data")) by(actor)
graph export figs/fig3.eps, replace

*significnat differences in collaboration across treatment/endline?
*Overall across all actors
estpost tabstat collab, by(treatgroup) statistics(mean sd) columns(statistics)

oneway collab treatgroup 
scalar anova_p = Ftail(r(df_m), r(df_r), r(F))
estadd scalar anova_p

kwallis collab, by(treatgroup)
scalar kwallis_p = chi2tail(r(df), r(chi2))
estadd scalar kwallis_p
estimates store means_all, title("Pooled")

meologit collab treat##endline i.actor || hhid: 
estimates store ord_all, title("Pooled")

local lab1 : label actor 1
di "`lab1'"

*seperately
forvalues i=1/5 {
	local lab : label (actor) `i'
	estpost tabstat collab if actor==`i', by(treatgroup) statistics(mean sd) columns(statistics)
	
	oneway collab treatgroup if actor==`i'
	scalar anova_p = Ftail(r(df_m), r(df_r), r(F))
	estadd scalar anova_p
	
	kwallis collab if actor==`i', by(treatgroup)
	scalar kwallis_p = chi2tail(r(df), r(chi2))
	estadd scalar kwallis_p
	estimates store means_`i', title("`lab'")
	
	meologit collab treat##endline if actor==`i' 
	estimates store ord_`i', title("`lab'")
}

estout means_1 means_2 means_3 means_4 means_5 means_all using "tables/means.rtf", replace ///
	cells(mean(fmt(3) star) sd(par fmt(2))) label ///
	stats(N anova_p kwallis_p, fmt(0 3 3) ///
	labels("N" "Anova p-value" "Kruskal-Wallis p-value")) ///
	varwidth(30)

estout ord_1 ord_2 ord_3 ord_4 ord_5 ord_all using "tables/ordregs.rtf", replace ///
	cells(b(fmt(3) star) p(par fmt(3))) collabels(none) eqlabels("Fixed Effects" "Cutpoints") ///
	stats(N chi2 ll aic bic, fmt(0 3 3 3 3) labels("N" "Chi-Squared" "Log-Likelihood" "AIC" "BIC")) ///
	varlabels(/:cut1 "Cutpoint 1" /:cut2 "Cutpoint 2" /:cut3 "Cutpoint 3" var(_cons[village]) "Var(Village)" var(_cons[village>hhid]) "Var(Household)", elist(/:cut3 "{break}{hline @width}")) ///
	nobaselevels label stardrop(*)

*these take a long time to run
estimates restore ord_all
margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(1))+predict(outcome(2))+predict(outcome(3))) post
estimates store ate1, title("Pr(y>0)")

estimates restore ord_all
margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(2))+predict(outcome(3))) post
estimates store ate2, title("Pr(y>1)")

estimates restore ord_all
margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(3))) post
estimates store ate3, title("Pr(y>2)")

coefplot (ate1, ms(circle) label("Pr(Collab > 1)")) ///
	(ate2, ms(D) label("Pr(Collab > 2)")) ///
	(ate3, ms(triangle) label("Pr(Collab = 3)")), ///
	msize(med) legend(size(*.75) position(4) ring(0) bmargin(0 0 20 0)) ///
	///title({bf:Main Effects for Full Sample}, span size(16-pt) justification(left) box bexpand color(white) bcolor(black)) ///
	xline(0) xlab(-.1(.1).4)  levels(95) xsize(5) ysize(5) ///
	mlabel format(%9.3f) mlabposition(2) mlabsize(vsmall) ///
	///subtitle("     Average Treatment Effects", size(10-pt) span bexpand justification(left)) ///
	note("Treatment effects are cumulative differences in the probaility of an outcome at endline, i.e. Pr(y>j|treatment, endline) - Pr(y>j|control, endline)," "with 95% confidence intervals.", size(tiny) span)  ///
	saving(treatefx.gph, replace)
gr_edit legend.xoffset = -5
graph export figs/treatefx.eps, replace	

estout ate1 ate2 ate3 using tables/ates.rtf, replace ///
	cells(b(fmt(3) star) p(par fmt(3))) ///
	label starlevels(* .1 ** .05 *** .01) keep(1.treat:) ///
	///title("Average Treatment Effects") collabels(none) eqlabels(none) ///
	stardrop(*)

* gonna do some machine learning for subgroup effects
* first define training data (40 random villages in both t/c)	
set seed 12035

egen vtag=tag(village) // tag one obs per village
gen rvill=runiform() if vtag==1 //random number for each village
egen rvillrank=rank(rvill), by(treat) //rank the order of random numbers for both t/c villages
tab rvillrank // ensures random order of each
egen rvillfill=mean(rvillrank), by(village) // fill in these ranks for each observatin w/n each village
gen train=rvillfill<=40
tab train if vtag==1 // data from 80 villages used to train, 27 villages used to verify

gen did=treat*endline
replace age=. if age==8
tab age, m
recode age (1=21) (2=30) (3=40) (4=50) (5=60) (6=70) (7=80), gen(agem)
gen agem2=agem*agem
gen p_assign=1/107
replace hhsize=. if hhsize==9
replace visitin=. if visitin==6
replace visitout=. if visitout==6
egen meandid=mean(did)

*demean the dep variables
egen groupinds=group(actor endline)
tab groupinds

egen collab_means=mean(collab), by(groupinds)
gen collab_demean=collab-collab_means
*hist collab_demean, discrete

gen idnum=_n
* issueranking_land issueranking_soc agem gender socialnetwork hhsize psclacc reside visitin visitout ownland hcacc
*export data to r and run the script for the grf package - 10,000 trees
saveold "data\longuga.dta", replace version(12)
*from the r script I have exported a results file to merge back into stata
*uses the following covariates for estimating heterogeneity
* actor (dummies for each of the five)
* issueranking_land, issueranking_soc, agem, gender, socialnetwork, hhsize, psclacc, visitin, visitout, ownland, hcacc

merge 1:1 idnum using "data\temp_grf_merge.dta", nogen

* also exported importance measures as separate stata dataset in /data/importance.dta
save tmpsave.dta
	use "data\importance.dta", clear
	destring importance, replace
	egen rank=rank(importance)
	sum rank
	local max=r(max)
	replace rank=`max'+1-rank
	sort rank
	forvalues i=1/`max' {
		label define lblname `i' "`: di variable[`i']'", add
	}
	label values rank lblname
	graph hbar (asis) importance, over(rank) ///
		///title({bf:Importance Estimates}, span size(16-pt) justification(left) box bexpand color(white) bcolor(black)) ///
		///subtitle("     estimated from Causal Forests", size(10-pt) span bexpand justification(left)) ///
		xsize(5) ysize(5)
	graph export figs/fig6.eps, replace
use tmpsave.dta, clear
erase tmpsave.dta

* look at estimated treatment effects across samples
label var tau "Estimated Treatment Effect"
label var tause "Estimated TE Standard Error"
label define train 1 "Training Data" 0 "Validation Data"
label values train train
sum tau if train==1
local meantau=round(r(mean),.001)
di `meantau'
twoway (hist tau if train==1, color(black)) ///
	(hist tau if train==0, color(blue) fc(%20)), ///
	xline(`meantau', lw(thick)) text(8 `meantau' "Mean = `meantau'", placement(east)) ///
	legend(order(1 "Training Data" 2 "Validation Data")) ///
	title("Distribution of Estimated Treatment Effects", size(12-pt) color(white) span box fcolor(black) bexpand justification(left)) ///
	legend(position(3) ring(0)) xsize(5) ysize(5)
graph export figs/histtreat.eps, replace

gen ul_tau=1.96*tause+tau
gen ll_tau=-1.96*tause + tau

sort tau
gen id=_n
twoway (rbar ul_tau ll_tau id, lc(gs1%10) lw(vthin)) ///
	(line tau id, lw(thin)), ///
	by(train) ///
	///by(, title("Distribution of Estimated Treatment Effects", size(12-pt) color(white) span box fcolor(black) bexpand justification(left))) ///
	by(,legend(off) note("Point estimates and 95% confidence intervals")) ///
	subtitle(, box lc(black) margin(small)) ///
	yline(0) ylab(-.1(.1).4) xsize(5) ysize(5) xsc(off) xlab(none) xtitle("")
graph export figs/fig5.eps, replace

*Most important variables: land, social services, visitin, visitout
twoway (scatter tau issueranking_land if train==1, jitter(5) ms(oh) mc(%20)) ///
	(fpfit tau issueranking_land if train==1, lw(thick)), ytitle("Estimated Treatment Effect") ///
	legend(off) saving(land.gph, replace) xtitle("") ///
	title("Issue Ranking-Land", size(8-pt) box bexpand lc(black) margin(small))
twoway (scatter tau issueranking_soc if train==1, jitter(5) ms(oh) mc(%20)) ///
	(fpfit tau issueranking_soc if train==1, lw(thick)), ytitle("Estimated Treatment Effect") ///
	legend(off) saving(soc.gph, replace) xtitle("") ///
	title("Issue Ranking-Social Services", size(8-pt) box bexpand lc(black) margin(small))
twoway (scatter tau visitin if train==1, jitter(5) ms(oh) mc(%20)) ///
	(fpfit tau visitin if train==1, lw(thick)), ytitle("Estimated Treatment Effect") ///
	legend(off) saving(visitin.gph, replace) xtitle("") xlab(,valuelabel labsize(tiny)) ///
	title("Visit In Freq", size(8-pt) box bexpand lc(black) margin(small))
twoway (scatter tau visitout if train==1, jitter(5) ms(oh) mc(%20)) ///
	(fpfit tau visitout if train==1, lw(thick)), ytitle("Estimated Treatment Effect") ///
	legend(off) saving(visitout.gph, replace) xtitle("") xlab(,valuelabel labsize(tiny)) ///
	title("Visit Out Freq", size(8-pt) box bexpand lc(black) margin(small))

graph combine land.gph soc.gph visitin.gph visitout.gph, cols(2) ycommon ysize(5) xsize(5) ///
	/// title("{bf:Heterogenous Treatment Effects}", span size(12-pt) justification(left) box bexpand color(white) bcolor(black)) ///
	///	subtitle("     from 4 most important variables.", size(4-pt) span bexpand justification(left)) ///
	note("Local polynomial fit on estimated treatment effects of the training data", size(tiny)) 
graph export figs/fig7.eps, replace 

*** TARGETING ***
*----------------
egen medtau=median(tau) if endline==0, by(village)
egen target=rank(medtau) if train==0 & endline==0 & vtag==1, unique
replace target=28-target // this will put the largest te first in priority
label var target "Target Priority Village"
egen targetall=mean(target) if endline==0, by(village)
label var targetall "Village Priority of Household"
gen tau_treat=tau if treat==1
label var tau_treat "Treatment"
gen tau_cont=tau if treat==0
label var tau_cont "Control"
graph box tau_treat tau_cont, boxgap(-25) over(targetall) nofill ///
	medtype(cline) medline(lwidth(thick)) ///
	ytitle("Distribution of Est Treatment Effects w/n Village") ///
	title("Target Priority Rank of Village", size(12-pt) color(white) span box fcolor(black) bexpand justification(left)) ///
	note(" " "Note: Villages ranked on median estimated treatment effect from baseline data.", span) ///
	legend(title("Actual Treatment" "Assignment", size(small))) ///
	xsize(5) ysize(3)
graph export figs/priority_box.eps, replace

egen idtag=tag(hhid)

egen rankwn=rank(tau) if idtag==1 & endline==0 & targetall!=., by(targetall) unique
replace rankwn=30*rankwn
gen ranst=string(rankwn)
replace ranst="0" +string(rankwn) if rankwn<100

egen rankplot=concat(targetall ranst) if rankwn!=. & targetall!=., punct(".")
destring rankplot, replace
replace rankplot=rankplot-.4

twoway	(scatter medtau rankplot, ms(square) msize(vsmall) mc(plg1)) ///
	(scatter tau rankplot if treat==0, mc(black%40)) ///
	(rspike ul_tau ll_tau rankplot if treat==0, lcolor(black%20)) ///
	(scatter tau rankplot if treat==1, mc(plb1%40) ms(triangle)) ///
	(rspike ul_tau ll_tau rankplot if treat==1, lcolor(plb1%20)), ///
	xlabel(.5(1)27.5, nolabels noticks) xmtick(1(1)27, labels)  ///
	ylabe(-.1(.1).4) ///
	xtitle(" " "Village Priority Ranking (Median Effect)") ytitle("Household Treatment Effects") ///
	note("Note: Estimated treatment effects from vaidation data at baseline. 95% confidence intervals also reported.", size(tiny)) ///
	ysize(2) xsize(5) legend(order(1 "Village Median" 2 "Assigned to Control" 4 "Assigned to Treatment") position(10) ring(0)) ///
	///title("Estimated Treatment Effects by Village Priority", size(10-pt) color(white) span box fcolor(black) bexpand justification(left))

graph export figs/fig8.eps, replace

* estimate ates for the top 10 villages with highest median expected treatments (using baseline data)
drop target
egen target=mean(targetall), by(village)
gen top10=target<11

meologit collab treat##endline i.actor if top10==1 || hhid:
estimates store top10, title("Top 10 Priority")

	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(1))+predict(outcome(2))+predict(outcome(3))) post
	estimates store top10ate1, title("Pr(y>0)")

	estimates restore top10
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(2))+predict(outcome(3))) post
	estimates store top10ate2, title("Pr(y>1)")

	estimates restore top10
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(3))) post
	estimates store top10ate3, title("Pr(y>2)")

	coefplot (top10ate1, ms(circle) label("Pr(Collab > 1)")) ///
		(top10ate2, ms(D) label("Pr(Collab > 2)")) ///
		(top10ate3, ms(triangle) label("Pr(Collab = 3)")), ///
		msize(med) legend(off) ///
		title({bf:Top 10 Priority Villages}, span size(16-pt) justification(left) box bexpand color(white) bcolor(black)) ///
		xline(0) xlab(-.1(.1).4) levels(95) xsize(5) ysize(5) ///
		mlabel format(%9.3f) mlabposition(2) mlabsize(vsmall) ///
		///subtitle("     Average Treatment Effects", size(10-pt) span bexpand justification(left))  ///
		saving(top10ate.gph, replace)
	
	graph export figs/top10ate.eps, replace

* estimate ates for 10 random villages 
set seed 12002
gen ranunif = runiform() if vtag==1 & train==0
egen ranrank=rank(ranunif), by(treat)
tab ranrank treat
gen ran10=ranrank<=5 if ranrank!=. //randomly select 5 treat and 5 control
egen ran10all=median(ran10) if train==0, by(village)
tab village ran10all

meologit collab treat##endline i.actor if ran10all==1 || hhid:
estimates store ran10, title("Random 10 villages")

	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(1))+predict(outcome(2))+predict(outcome(3))) post
	estimates store ran10ate1, title("Pr(y>0)")

	estimates restore ran10
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(2))+predict(outcome(3))) post
	estimates store ran10ate2, title("Pr(y>1)")

	estimates restore ran10
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(3))) post
	estimates store ran10ate3, title("Pr(y>2)")

	coefplot (ran10ate1, ms(circle) label("Pr(Collab > 1)")) ///
		(ran10ate2, ms(D) label("Pr(Collab > 2)")) ///
		(ran10ate3, ms(triangle) label("Pr(Collab = 3)")), ///
		msize(med) legend(off) ///
		title({bf:Random 10 Villages}, span size(16-pt) justification(left) box bexpand color(white) bcolor(black)) ///
		xline(0) xlab(-.1(.1).4)  levels(95) xsize(5) ysize(5) ///
		mlabel format(%9.3f) mlabposition(2) mlabsize(vsmall) ///
		///subtitle("     Average Treatment Effects", size(10-pt) span bexpand justification(left))  ///
		saving(ran10ate.gph, replace)
	
	graph export figs/ran10ate.eps, replace
	
* estimate ates for all of the villages in the validation data	
meologit collab treat##endline i.actor if train==0 || hhid:
estimates store all, title("All 27 villages")

	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(1))+predict(outcome(2))+predict(outcome(3))) post
	estimates store allate1, title("Pr(y>0)")

	estimates restore all
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(2))+predict(outcome(3))) post
	estimates store allate2, title("Pr(y>1)")

	estimates restore all
	margins, dydx(treat) at(endline==1) over(actor) expression(predict(outcome(3))) post
	estimates store allate3, title("Pr(y>2)")

	coefplot (allate1, ms(circle) label("Pr(Collab > 1)")) ///
		(allate2, ms(D) label("Pr(Collab > 2)")) ///
		(allate3, ms(triangle) label("Pr(Collab = 3)")), ///
		msize(med) legend(size(*.75) position(4) ring(0)) ///
		title({bf:All Validation Sample Villages}, span size(16-pt) justification(left) box bexpand color(white) bcolor(black)) ///
		xline(0) xlab(-.1(.1).4)  levels(95) xsize(5) ysize(5) ///
		mlabel format(%9.3f) mlabposition(2) mlabsize(vsmall) ///
		///subtitle("     Average Treatment Effects", size(10-pt) span bexpand justification(left))  ///
		saving(allate.gph, replace)
	
	graph export figs/allate.eps, replace
	
graph combine top10ate.gph ran10ate.gph allate.gph, cols(1) ysize(10) xsize(5)
graph export figs/fig9.eps, replace

estout top10ate* ran10ate* allate* using tables/priorityates.rtf, replace ///
	cells(b(fmt(3) star) p(par fmt(3))) ///
	mgroups("Top 10" "Random 10" "All Validation Data" , pattern(1 0 0 1 0 0 1 0 0) span) ///
	label stardrop(*) keep(1.treat:) ///
	title("Average Treatment Effects in Validation Data") collabels(none) eqlabels(none) 
	
save analysis_final.dta, replace
